/*
Date: October 2025
Project: Income and Child Maltreatment: Evidence from a Discontinuity in Tax Benefits
Author: Katherine Rittenhouse
Purpose: This file links death records to birth records using pre-existing linkage.
Files in: Extract_10151_1_2021; Births_fkconsistent; Deaths_99_20
Files out: births_deaths_cw; Births_fkconsistent_deaths
*/

clear all
set more off

import delim "Extract_10151_1_2021.txt",clear


*keep only those linking deaths to births
keep if (substr(record1_source,1,4)=="DSMF" & substr(record2_source,1,4)=="BSMF") | (substr(record2_source,1,4)=="DSMF" & substr(record1_source,1,4)=="BSMF")

*drop if death is linked to later birth (can't have death before birth year)
drop if (substr(record1_source,1,4)=="DSMF" & record1_year < record2_year)
drop if (substr(record2_source,1,4)=="DSMF" & record2_year < record1_year)

*create birth id
gen bcstateid = ""
replace bcstateid = record1_id2 if record1_id2_name=="bcstateid"
replace bcstateid = record2_id2 if record2_id2_name=="bcstateid"

gen length = strlen(bcstateid)
replace bcstateid = substr(bcstateid,8,6) if length==13
replace bcstateid = "0" + bcstateid if length==5
replace bcstateid = "00" + bcstateid if length==4
replace bcstateid = "000" + bcstateid if length==3
replace bcstateid = "0000" + bcstateid if length==2
replace bcstateid = "00000" + bcstateid if length==1
replace bcstateid = "000000" + bcstateid if length==0


gen bclocalid = ""
replace bclocalid = record1_id1 if record1_id1_name=="bclocalid"
replace bclocalid = record2_id1 if record2_id1_name=="bclocalid"

gen lengthloc = strlen(bclocalid)
replace bclocalid = substr(bclocalid,8,6) if lengthloc==13
replace bclocalid = "0" + bclocalid if lengthloc==5
replace bclocalid = "00" + bclocalid if lengthloc==4
replace bclocalid = "000" + bclocalid if lengthloc==3
replace bclocalid = "0000" + bclocalid if lengthloc==2
replace bclocalid = "00000" + bclocalid if lengthloc==1
replace bclocalid = "000000" + bclocalid if lengthloc==0


gen birthyear = .
replace birthyear = record1_year if substr(record1_source,1,1)=="B"
replace birthyear = record2_year if substr(record2_source,1,1)=="B"

tostring birthyear,force replace
replace birthyear = "0000" if birthyear== "" | birthyear=="."
gen bid = birthyear + bcstateid + bclocalid 

*3 births with no bclocalid and no bcstateid 
drop if bclocalid=="000000"& bcstateid=="000000"

gen length2= strlen(bid)
tab length2

*gen death id

gen dcstateid = ""
replace dcstateid = record1_id2 if record1_id2_name=="dcstateid"
replace dcstateid = record2_id2 if record2_id2_name=="dcstateid"

gen dlength = strlen(dcstateid)
replace dcstateid = substr(dcstateid,8,6) if dlength==13
replace dcstateid = "0" + dcstateid if dlength==5
replace dcstateid = "00" + dcstateid if dlength==4
replace dcstateid = "000" + dcstateid if dlength==3
replace dcstateid = "0000" + dcstateid if dlength==2
replace dcstateid = "00000" + dcstateid if dlength==1
replace dcstateid = "000000" + dcstateid if dlength==0


gen dclocalid = ""
replace dclocalid = record1_id1 if record1_id1_name=="dclocalid"
replace dclocalid = record2_id1 if record2_id1_name=="dclocalid"

gen dlengthloc = strlen(dclocalid)
replace dclocalid = substr(dclocalid,8,6) if dlengthloc==13
replace dclocalid = "0" + dclocalid if dlengthloc==5
replace dclocalid = "00" + dclocalid if dlengthloc==4
replace dclocalid = "000" + dclocalid if dlengthloc==3
replace dclocalid = "0000" + dclocalid if dlengthloc==2
replace dclocalid = "00000" + dclocalid if dlengthloc==1
replace dclocalid = "000000" + dclocalid if dlengthloc==0



gen deathyear = .
replace deathyear = record1_year if substr(record1_source,1,1)=="D"
replace deathyear = record2_year if substr(record2_source,1,1)=="D"

tostring deathyear,force replace
replace deathyear = "0000" if deathyear== "" | deathyear=="."
gen did = deathyear + dcstateid + dclocalid 

drop if dclocalid=="000000"& dcstateid=="000000"
*0 observations


keep did bid bc* dc* match_prob 
duplicates drop 


sort did match_prob
by did: keep if _n==_N

sort bid match_prob
by bid: keep if _n==_N

save "births_deaths_cw.dta",replace

*match to deaths 
use "births_deaths_cw.dta",clear

merge 1:m did using "Deaths_99_20.dta"


drop if _m==2	
drop _m
*match to big file
merge 1:1 bid using "Births_fkconsistent.dta",gen(_mdeath)

drop if _mdeath==1
save "Births_fkconsistent_deaths.dta",replace






